##############################################################
# REPLICATION CODE: "Are coups good for democracy?"          #      
# PURPOSE: Produce repression_monthly.csv from open-access   #
# primary sources. That is the data file used for Figure 3   #
# of the main text, and Figure C-5 of the Online Appendix.   # 
# AUTHOR: George Derpanopoulos (gderpa@ucla.edu)             #
##############################################################

##############
##############
## PREAMBLE ##
##############
##############

# rm(list=ls())  # clear working space (recommended)
setwd('~/Dropbox/R&P submission/replication files/repression_monthly')  # set working directory (adjust by user)

## Load packages (install any not installed already)
library(XLConnect)         # for importing Excel files
library(countrycode)       # for converting country codes
library(chron)             # for manipulating dates
options(chron.year.abb=FALSE)  # don't abbreviate year to last 2 digits; otherwise problem w. dates from diff. centuries
library(plyr)              # for manipulating data


##########################
##########################
## PREP & COUNTRY CODES ##
##########################
##########################

#######
# WAD #
#######

## Import data
old <- "pitf.world.19950101-20121231.xls"  # 1995-2012 events file (http://eventdata.parusanalytics.com/data.dir/atrocities.html)  
newer <- "pitf.world.20130101-20150131.xls" # latest events file as of 5/15/2015 (http://eventdata.parusanalytics.com/data.dir/atrocities.html)
wad.old <- readWorksheetFromFile(file=old, sheet=1, startRow=3, startCol=1, endCol=73)  # skip two meta-header rows; drop empty last column
wad.newer <- readWorksheetFromFile(file=newer, sheet=1, startRow=3, startCol=1, endCol=73)  # skip two meta-header rows; drop empty last column
wad <- data.frame(rbind(wad.old, wad.newer))  # create 1995-2015 WAD dataset

## Recode country codes w. typos / mutiple spellings  
# unique(wad$Country)  # review country codes
wad$Country[wad$Country=="\nIRQ" |
              wad$Country=="IRQ "] <- "IRQ"  # Iraq
wad$Country[wad$Country=="\nYEM"] <- "YEM"  # Yemen
wad$Country[wad$Country=="SYR "] <- "SYR"  # Syria
wad$Country[wad$Country=="Somalia" | 
              wad$Country=="SOM "] <- "SOM"  # Somalia
wad$Country[wad$Country=="Nigeria"] <- "NGA"  # Nigeria
wad$Country[wad$Country=="South Sudan" | 
              wad$Country=="South Sudan "] <- "SSD"  # South Sudan

## Recode problematic country codes. (WAD country codes are mix between iso3-char format & arbitrary codes.)
wad$cowc <- countrycode(sourcevar=wad$Country, origin='iso3c', destination='cowc')  # convert country codes to cow-char format
# unique(wad[is.na(wad$cowc),'Country']) # review non-matching country codes
wad$cowc[wad$Country=='ALG'] <- 'ALG'  # Algeria (didn't use iso3c in all entries)
wad$cowc[wad$Country=='SUD'] <- 'SUD'  # Sudan (didn't use iso3c in all entries)
wad$cowc[wad$Country=='SCG'] <- 'YUG'  # Serbia & Montenegro (= Yugoslavia in COW)
wad$cowc[wad$Country=='ESH'] <- NA     # Western Sahara (not independent state)
wad$cowc[wad$Country=='TMP'] <- 'ETM'  # Timor-Leste (= East Timor in COW)
wad$cowc[wad$Country=='PRI'] <- 'USA'  # Puerto Rico (not independent state)
wad$cowc[wad$Country=='NGR'] <- 'NIG'  # Nigeria (inferred from entries; didn't use iso3c in all entries)
wad$cowc[wad$Country=='GZS'] <- 'ISR'  # Gaza (inferred from entries; not independent state; = Israel in COW)
wad$cowc[wad$Country=='CAR'] <- 'CEN'  # Cen Afr Rep (didn't use iso3c in all entries)
wad$cowc[wad$Country=='PSE'] <- 'ISR'  # Palestine (inferred from entries; not independent state; = Israel in COW)
wad$cowc[wad$Country=='THL'] <- 'THI'  # Thailand (inferred from entries; didn't use iso3c in all entries)
wad$cowc[wad$Country=='BRZ'] <- 'BRA'  # Brazil (inferred from entries; didn't use iso3c in all entries)
wad$cowc[wad$Country=='BAH'] <- 'SUD'  # Bahamas (inferred from entries; didn't use iso3c in all entries)
wad$cowc[wad$Country=='ELS'] <- 'SAL'  # El Salvador (inferred from entries; didn't use iso3c in all entries)


########
# SCAD #  
########

scad.afr <- read.csv('SCAD 3.1 (For Public Release).csv')  # latest Africa file as of 05/15/2015 (https://www.strausscenter.org/scad.html)
scad.latam <- read.csv('Latin America_FINAL.csv')  # latest Latin America file as of 05/15/2015 (https://www.strausscenter.org/scad.html)

## Rename vars in LatAm dataset; for binding w. Africa dataset
names(scad.latam)[names(scad.latam)=='endday'] <- 'eday' 
names(scad.latam)[names(scad.latam)=='endmo'] <- 'emo' 
names(scad.latam)[names(scad.latam)=='endyr'] <- 'eyr' 
names(scad.latam)[names(scad.latam)=='GISlocnum'] <- 'gislocnum' 
names(scad.latam)[names(scad.latam)=='acdquestionable'] <- 'acd_questionable' 

scad <- rbind(scad.afr, scad.latam)  # bind regional datasets

scad$cowc <- countrycode(sourcevar=scad$ccode, origin='cown', destination='cowc')  # convert cow-num code to cow-char; for merge 


######
# PT #
######

pt <- read.table(file='powell_thyne_coups_final.txt', header=TRUE)  # latest Powell & Thyne Global Instances of Coups dataset as of 05/15/2015 (http://www.uky.edu/~clthyn2/coup_data/powell_thyne_coups_final.txt)

pt$cowc <- countrycode(sourcevar=pt$ccode, origin='cown', destination='cowc')  # convert country code to cow-char; for merge


#######
# GWF #
#######

gwf <- readWorksheetFromFile(file='GWF Autocratic Regimes 2015 Update.xlsx', sheet=1)  # latest Geddes, Wright & Frantz Autocratic Regimes dataset as of 05/15/2015 (private correspondence)
gwf$cowc <- countrycode(sourcevar=gwf$cowcode, origin='cown', destination='cowc')  # convert country codes to cow-char format; for merge



#########################
#########################
## COUNTRY-MONTH PANEL ##
#########################
#########################

###############
# EMPTY PANEL #
###############

## Create country-month rectangle
cowc <- intersect(unique(pt$cowc[pt$year>1989]), 
                  unique(gwf$cowc[gwf$gwf_endyr>1989 | 
                                    is.na(gwf$gwf_endyr)]))  # keep country names of autocracies that experienced coups in [1990,2014] (include ongoing gwf regimes); for creating balanced monthly panel
year <- rep(x=seq(1990, 2014), each=12)  # create year sequence
month <- rep(x=c("01","02","03","04","05","06","07","08","09","10","11","12"), times=length(unique(year)))  # create month sequence
yearmonth <- paste(year, month, sep='-')  # create yearmonth sequence
rack <- expand.grid(cowc, yearmonth)  # create balanced monthly panel
names(rack) <- c("cowc", "yearmonth")  # name variables
rack$year <- substr(x=rack$yearmonth, start=1, stop=4)  # create year variable
rack$month <- substr(x=rack$yearmonth, start=6, stop=7)  # create month variable
rack <- rack[order(rack$cowc, rack$year, rack$month),]  # order panel in country-year-month structure


#######
# WAD #
#######

# Create year-month variable using start dates of events; for merge (some months in the 1-9 range are given as "01" and others as "1")
wad$yearmonth <- paste(wad$Start.Year,
                       ifelse(as.numeric(wad$Start.Month)<10 & 
                                substr(wad$Start.Month, start=1, stop=1)!="0",
                              paste0("0", wad$Start.Month), 
                              wad$Start.Month),
                       sep='-')

wad$deathnum <- as.numeric(wad$Deaths.Number)  # create numeric version of death count; coerce ambiguous counts to NA

## Create dummies for temporal nature of events
wad$incident <- ifelse(wad$Event.Type=="incident" | 
                         wad$Event.Type=="Incident" |
                         wad$Event.Type=="Incident ", 
                       1, 0)
wad$campaign <- ifelse(wad$Event.Type=="Campaign" | 
                         wad$Event.Type=="Campaign ", 
                       1, 0)

# Create dummy for events by state/-sanctioned pepetrator
wad$state <- ifelse(wad$Perp.State.Role=='Multiple Perpetrators (State)' | 
                      wad$Perp.State.Role=='Non-State, Internal, No State, Sanction' |
                      wad$Perp.State.Role=='Non-State, Internal, State Sanction' |
                      wad$Perp.State.Role=='State Perpertrator' |
                      wad$Perp.State.Role=='State Perpetrator' |
                      wad$Perp.State.Role=='State Perpetrator ', 
                    1, 0)

# Create country-month WAD panel. Includes country-month event and death counts by state-sanctioned perpetrators.
wad.mo <- ddply( wad, .(cowc, yearmonth), summarize,
                 wad_deaths.state = sum(deathnum[state==1], na.rm=TRUE),
                 wad_incidents.state = sum(incident[state==1], na.rm=TRUE),
                 wad_campaigns.state = sum(campaign[state==1], na.rm=TRUE) )

dat.mo <- merge(x=rack, y=wad.mo, all.x=TRUE)  # left-merge into empty country-month panel


########
# SCAD #
########

## Create year-month variable using start dates of events; for merge (some months in the 1-9 range are given as "01" and others as "1")
scad$yearmonth <- paste(scad$styr,
                        ifelse(scad$stmo<10,
                               paste0("0", scad$stmo), 
                               scad$stmo),
                        sep='-')

scad$ndeath <- ifelse(scad$ndeath<0, NA, scad$ndeath)  # give NA when number of deaths unknown (-99, -88, -77 codes given) 

## Create dummies for temporal nature of events
scad$incident <- ifelse(scad$stmo==scad$emo, 1, 0)  # create incident dummy
scad$campaign <- ifelse(scad$stmo!=scad$emo, 1, 0)  # create campaign dummy

# Country-month SCAD panel. Includes country-month event and death counts by state-sanctioned perpetrators.
scad.mo <- ddply(scad, .(cowc, yearmonth), summarize,
                 scad_deaths.state = sum(ndeath[etype==7], na.rm=TRUE),
                 scad_incidents.state = sum(incident[etype==7]==1, na.rm=TRUE),
                 scad_campaigns.state = sum(campaign[etype==7]==1, na.rm=TRUE)
                 )

dat.mo <- merge(x=dat.mo, y=scad.mo, all.x=TRUE)  # left-merge into country-month panel


######
# PT #
######

# Create year-month variable; for merge (some months in the 1-9 range are given as "01" and others as "1")
pt$yearmonth <- paste(pt$year,
                      ifelse(pt$month<10, paste0("0", pt$month), pt$month),
                      sep='-')

pt.merge <- data.frame(cowc=pt$cowc, yearmonth=pt$yearmonth, coup=pt$coup)  # create dataframe with retained coup variables; for merge

pt.merge <- pt.merge[!duplicated(pt.merge), ]  # remove duplicates (occur when >1 coup of same type (success/failed) in same country-month); for our purposes redundant info, since data monthly (no country-month in our sample has coups of different type in same month)

pt.merge$coupID <- id(pt.merge)  # create ID var for each coup; for generating coup variables

dat.mo <- merge(x=dat.mo, y=pt.merge, all.x=TRUE)  # left-merge into country-month panel


#######
# GWF #
#######

# Convert gwf_startdate entries formated by excel import as yyyy-dd-mm 00:00:00 into dd/mm/yyyy (that of most entries)
for(i in which(substr(x=gwf$gwf_startdate, start=5, stop=5)=='-')){
  gwf$gwf_startdate[i] <- paste(substr(x=gwf$gwf_startdate[i], start=6, stop=7),
                                substr(x=gwf$gwf_startdate[i], start=9, stop=10),
                                substr(x=gwf$gwf_startdate[i], start=1, stop=4),
                                sep='/')
} 

# Same for gwf_enddate
for(i in which(substr(x=gwf$gwf_enddate, start=5, stop=5)=='-')){
  gwf$gwf_enddate[i] <- paste(substr(x=gwf$gwf_enddate[i], start=6, stop=7),
                              substr(x=gwf$gwf_enddate[i], start=9, stop=10),
                              substr(x=gwf$gwf_enddate[i], start=1, stop=4),
                              sep='/')
} 

# Convert gwf_startdate entries coded as dd/m/yyyy into dd/mm/yyyy (that of most entries)
for(i in which(substr(x=gwf$gwf_startdate, start=5, stop=5)=='/')){
  gwf$gwf_startdate[i] <- paste0(substr(x=gwf$gwf_startdate[i], start=1, stop=3),
                                 '0',
                                 substr(x=gwf$gwf_startdate[i], start=4, stop=9))
} 

gwf$gwf_startdate[gwf$gwf_casename=='Mali 12-13'] <- '22/03/2012'  # recode latest mali regime (coded as yyyy/mm/dd, so above recoding misses it)

# Same for gwf_enddate
for(i in which(substr(x=gwf$gwf_enddate, start=5, stop=5)=='/')){
  gwf$gwf_enddate[i] <- paste0(substr(x=gwf$gwf_enddate[i], start=1, stop=3),
                               '0',
                               substr(x=gwf$gwf_enddate[i], start=4, stop=9))
} 

## Create regime-month panel ID variable
mo.ts <- ddply(gwf, .variables='gwf_casename', .fun=function(x){  # by regime case
  mo.ts <- data.frame()  
  # convert to chron object to create monthly sequences 
  case.months <- data.frame(yearmonth=seq(from=chron(x$gwf_startdate, format='d/m/y'), 
                                          to=chron(x$gwf_enddate, format='d/m/y'),  
                                          by="month"))
  mo.ts <- rbind(mo.ts, case.months) 
}
)

gwf.mo <- merge(x=mo.ts, y=gwf, all.x=TRUE)  # left merge main dataframe into regime-month panel ID variable; expands main dataframe to produce regime-month panel

# Create start year-month variable; for later use
gwf.mo$gwf_startyearmonth <- paste(substr(x=gwf.mo$gwf_startdate, start=7, stop=10),
                                   substr(x=gwf.mo$gwf_startdate, start=4, stop=5),
                                   sep='-')  

# Create end year-month variable; for later use
gwf.mo$gwf_endyearmonth <- paste(substr(x=gwf.mo$gwf_enddate, start=7, stop=10),
                                 substr(x=gwf.mo$gwf_enddate, start=4, stop=5),
                                 sep='-') 

# Convert year-month variable to yyyy-mm format; for merge
gwf.mo$yearmonth <- paste(substr(x=gwf.mo$yearmonth, start=7, stop=10),
                          substr(x=gwf.mo$yearmonth, start=4, stop=5),
                          sep='-') 

## Drop 1st month of new regime when regime transition back-to-back (duplicate created by expanding gwf into monthly panel)
gwf.mo <- gwf.mo[order(gwf.mo$cowc, gwf.mo$yearmonth, gwf.mo$gwf_startdate), ]  # reorder for above to work
gwf.mo <- gwf.mo[!duplicated(cbind(gwf.mo$yearmonth, gwf.mo$cowc)), ]  # drop duplicates

dat.mo <- merge(x=dat.mo, y=gwf.mo, all.x=TRUE)  # left-merge into country-month panel



####################
####################
## FINALIZE MERGE ##
####################
####################

###########
# GENERAL #
###########

## Create numeric versions of year & month for easier sorting & subsetting
dat.mo$year <- as.numeric(substr(x=dat.mo$yearmonth, start=1, stop=4))
dat.mo$month <- as.numeric(substr(x=dat.mo$yearmonth, start=6, stop=7))


######################
# VIOLENCE VARIABLES #
######################

# Function: creates count from similar wad and scad vars; gives wad values for 1995-2014, and scad values for 1990-1994.
agg.count <- function(x.wad, x.scad){
  x <- numeric()
  x <- ifelse(dat.mo$year>=1995, x.wad, x.scad)
  return(x)
}

## Create aggregate counts of events/deaths
dat.mo$deaths.state <- agg.count(x.wad=dat.mo$wad_deaths.state, x.scad=dat.mo$scad_deaths.state)
dat.mo$deaths.state[is.na(dat.mo$deaths.state)] <- 0  # replace NA w. 0 (= no events)
incidents.state <- agg.count(x.wad=dat.mo$wad_incidents.state, x.scad=dat.mo$scad_incidents.state)  # for creating w8.deaths.state var
campaigns.state <- agg.count(x.wad=dat.mo$wad_campaigns.state, x.scad=dat.mo$scad_campaigns.state)  # for creating w8.deaths.state var
dat.mo$w8.deaths.state <- dat.mo$deaths.state/(incidents.state + campaigns.state)  # deaths by state perpetrators weighed by no. incidents+campaigns from which they arise / are reported  
dat.mo$w8.deaths.state[is.na(dat.mo$w8.deaths.state)] <- 0  # replace NA w. 0 (= no events)


#########################
# REGIME-TYPE VARIABLES #
#########################

dat.mo$democracy <- ifelse(is.na(dat.mo$gwf_regimetype), 1, 0)  # create democracy dummy (if GWF has no info = democracy)

# Create dictatorship-democracy transition dummy
dict2dem.transition <- ddply(dat.mo, .variables='cowc', .fun=function(x){
  demo <- numeric()
  dict2dem.transition.yearmonths <- unique(x$gwf_endyearmonth[x$gwf_subsreg==1])
  dict2dem.transition <- ifelse(x$yearmonth %in% dict2dem.transition.yearmonths, 1, 0)
  return(cbind(demo, dict2dem.transition))
})
dat.mo$dict2dem.transition <- dict2dem.transition$dict2dem.transition

# Create dictatorship-dictatorship transition dummy
dict2dict.transition <- ddply(dat.mo, .variables='cowc', .fun=function(x){
  auto <- numeric()
  dict2dict.transition.yearmonths <- unique(x$gwf_endyearmonth[x$gwf_subsreg==2])
  dict2dict.transition <- ifelse(x$yearmonth %in% dict2dict.transition.yearmonths, 1, 0)
  return(cbind(auto, dict2dict.transition))
})
dat.mo$dict2dict.transition <- dict2dict.transition$dict2dict.transition


##################
# COUP VARIABLES #
##################

dat.mo$coup[is.na(dat.mo$coup)] <- 0  # replace NA w. 0 for all coup variables (PT is event dataset)

## Recodings
not.in.scad <- c('AFG', 'AZE', 'IRQ', 'PHI', 'THI')  # countries w. coups in 90-94 coups that are not covered by SCAD (i.e. not in Africa or LatAm) 
dat.mo$coup[dat.mo$cowc %in% not.in.scad & dat.mo$year<1995] <- 0  # nullify pre-95 coups from above countries
dat.mo$coup[dat.mo$cowc=='LES' & dat.mo$yearmonth=='1994-08'] <- 0  # "royal coup" (Archigos, 531) after 5 months of democracy (GWF, 74); doesn't capture coup info
dat.mo$coup[dat.mo$cowc=='UKR' & dat.mo$yearmonth=='2014-02'] <- 0  # clearly not a coup

dat.mo$failed.coup <- ifelse(dat.mo$coup==1, 1, 0)  # create failed coup dummy
dat.mo$success.coup <- ifelse(dat.mo$coup==2, 1, 0)  # create successful coup dummy

# Function: creates ID variable for months from closest coup of chosen type
months2.coup <- function(coup.type){  # NOTE: enter coup type as character
  months2.coup <- ddply(dat.mo, .variables='cowc', .fun=function(x, window=12){  # for 12-month window around coup
    months2coup <- numeric()
    coups <- which(x[, coup.type]==1)  
    mo2coup <- rep(NA, nrow(x))
    for(i in 1:nrow(x)){
      if(any(abs(i-coups)<=window)){ 
        mo2coup[i] <- i - coups[abs(i-coups)<=window]  
      }
    }
    cbind(months2coup, mo2coup)
  }
  )
  return(months2.coup$mo2coup)
}


################
# FAILED COUPS #
################

dat.mo$months2.failed.coup <- months2.coup(coup.type='failed.coup')  # create ID variable for months from closest failed coup


###################################
# DICTATORSHIP TO DEMOCRACY COUPS #
###################################

dat.mo$dict2dem.coup <- ifelse(dat.mo$coup==2 & dat.mo$dict2dem.transition==1, 1, 0)  # create dict-dem coup dummy; verified cases (ACW15)

## NOTE: coding misses democratizations that happened shortly after coup but can be attributed ex-post to it. Verified cases from ACW15
dat.mo$dict2dem.coup[dat.mo$cowc=='NIR' & dat.mo$yearmonth=='1999-04'] <- 1  # verified (ACW15)
dat.mo$dict2dem.coup[dat.mo$cowc=='NIR' & dat.mo$yearmonth=='2010-02'] <- 1  # verified (ACW15)
dat.mo$dict2dem.coup[dat.mo$cowc=='SIE' & dat.mo$yearmonth=='1996-01'] <- 1  # verified (ACW15)

dat.mo$months2.dict2dem.coup <- months2.coup(coup.type='dict2dem.coup')  # create ID variable for months from closest dict-dem coup 


######################################
# DICTATORSHIP TO DICTATORSHIP COUPS #
######################################

dat.mo$dict2dict.coup <- ifelse(dat.mo$coup==2 & dat.mo$dict2dict.transition==1, 1, 0)  # create dict-dict coup dummy; verified cases (ACW15)

## Recodings
dat.mo$dict2dict.coup[dat.mo$cowc=='NIR' & dat.mo$yearmonth=='2010-02'] <- 0  # verified

dat.mo$months2.dict2dict.coup <- months2.coup(coup.type='dict2dict.coup')  # create ID variable for months from closest dict-dict coup 


#####################
# RESHUFFLING COUPS #
#####################

# Create reshuffling coup dummy; verified cases (ACW15)
dat.mo$reshuffling.coup <- ifelse(dat.mo$coup==2 & 
                                    dat.mo$yearmonth!=dat.mo$gwf_startyearmonth &
                                    dat.mo$yearmonth!=dat.mo$gwf_endyearmonth, 
                                  1, 0)

## Recodings
dat.mo$reshuffling.coup[dat.mo$cowc=='NIR' & dat.mo$yearmonth=='1999-04'] <- 0  # see dict2dem coups above 
dat.mo$reshuffling.coup[dat.mo$cowc=='SIE' & dat.mo$yearmonth=='1996-01'] <- 0  # see dict2dem coups above

dat.mo$months2.reshuffling.coup <- months2.coup(coup.type='reshuffling.coup')  # create ID variable for months from closest reshuffling coup 

###################################################
write.csv(dat.mo, file='repression_monthly.csv')  # save final monthly panel in .csv format